home *** CD-ROM | disk | FTP | other *** search
- /* @(#)src/hash.c 1.5 7/11/92 11:49:16 */
-
- /*
- * Copyright (C) 1987, 1988 Ronald S. Karr and Landon Curt Noll
- * Copyright (C) 1992 Ronald S. Karr
- *
- * See the file COPYING, distributed with smail, for restriction
- * and warranty information.
- */
-
- /*
- * hash:
- * perform a string hashing algorithm functions
- *
- * Hash tables are defined by their size. It is suggested that the
- * size be a prime value, say:
- * 13, 29, 47, 61, 113, 181, 251, 359, 509, 751, 1021, 1511,
- * 2039, 3079, 4093, 6151, 8179, 12377, 16381, 24571, 32749,
- * 49139, 65557, 98299, 132049, 180023, 216091, 321367, 521539, ...
- * An advantage with the above primes is that they are at last 3 less
- * than 2^n, and no closer than 3 away from 3*(2^m) for the larger
- * values of m. Most malloc systems are most efficient when one allocates
- * 3 words less than 2^n bytes, i.e., 4*(2^n - 3) bytes.
- *
- * external functions:
- * add_to_hash, lookup_in_hash, delete_from_hash,
- * delete_from_hash, store_in_hash, new_hash_table,
- * write_hash_table, walk_hash, free_hash_element,
- * free_hash_table
- */
- #include <stdio.h>
- #include <ctype.h>
- #include "smail.h"
- #include "defs.h"
- #include "exitcodes.h"
- #include "hash.h"
- #include "alloc.h"
- #ifndef DEPEND
- # include "extern.h"
- # include "debug.h"
- #endif
-
- /* static functions used in this file */
- static int hash_str();
- static int hash_stric();
- static struct hash *new_hash_element();
-
- #ifdef STANDALONE
- int debug = 0; /* default debug level is NONE */
- #define errfile stderr
- void panic();
- #define xmalloc(x) (malloc(x)) /* naive allocator */
- #define bmalloc(x,y) (malloc(x)) /* naive allocator */
- char *malloc();
- #define xfree(x) (free(x)) /* naive deallocator */
- #define bfree(x,y) (free(x)) /* naive deallocator */
- void free();
- #endif /* STANDALONE */
-
- /*
- * ETHEREAL_HASHDATA -
- *
- * This file contains a nearly complete implementation of a general
- * hashing system. However it is likely that smail itself will
- * never need to delete, replace, or store new data over old data.
- * That is, smail will only create, add, lookup and perform hash
- * table file operations.
- *
- * The code ifdef-ed out does work and is tested by the STANDALONE code.
- * You should define ETHEREAL_HASHDATA if you want to use it
- */
-
-
- /*
- * hash_str - convert a trying into a hash value
- *
- * We build the hash value one character at a time by repeating the following
- * steps for each character:
- *
- * 1) The previous value is shifted up (by HASH_UP_SHIFT) and the
- * current character is added
- * 2) any upper level excess bits are fetched (by masking with
- * {H,L}_HASH_MASK) and xor-ed into bits near the bottom
- * 3) the upper level excess bits are cleared
- *
- * In the end, the hash value is taken modulo `mod' to produce a slot number.
- *
- * input:
- * str - string to hash
- * mod - number of hash table entries
- * output:
- * the slot number on which `str' belongs
- *
- * NOTE: For more optimal hashing of smaller hash tables (entries < HASH_LEVEL)
- * we L_ value constants. This gives us a faster hash fold. For larger
- * hash tables, this is not needed so H_ value constants are used.
- *
- * NOTE: mod should be a prime <= ~H_HASH_MASK
- */
- static int
- hash_str(str, mod)
- register char *str; /* the string to hash */
- int mod; /* prime modulus, size of hash table */
- {
- register unsigned long val; /* current hash value */
- register unsigned long excess; /* upper/excess bits in val */
- register unsigned long c; /* the current string character */
-
- /* firewall - bogus case, but be safe anyway */
- if (str == NULL) {
- return 0;
- }
-
- /*
- * hash each character in the string
- *
- * if our mod is small enough, then use L_ value constants so that
- * strings can fold into themselves faster.
- */
- if (mod < HASH_LEVEL) {
- /* hash each character using the L_ values */
- for (val = 0; c=(unsigned long)*str; ++str) {
- val = (val << HASH_UP_SHIFT) + c;
- val ^= ((excess = (val&L_HASH_MASK)) >> L_DOWN_SHIFT);
- val ^= excess;
- }
- } else {
- /* hash each character using the H_ values */
- for (val = 0; c=(unsigned long)*str; ++str) {
- val = (val << HASH_UP_SHIFT) + c;
- val ^= ((excess = (val&H_HASH_MASK)) >> H_DOWN_SHIFT);
- val ^= excess;
- }
- }
-
- return (int)(val%mod); /* our hash value, mod the hash size */
- }
-
- /*
- * hash_stric - convert a trying into a hash value without regard to case
- *
- * We build the hash value one character at a time by repeating the following
- * steps for each character:
- *
- * 1) The previous value is shifted up (by HASH_UP_SHIFT) and the
- * current character is added
- * 2) any upper level excess bits are fetched (by masking with
- * {H,L}_HASH_MASK) and xor-ed into bits near the bottom
- * 3) the upper level excess bits are cleared
- *
- * In the end, the hash value is taken modulo `mod' to produce a slot number.
- *
- * input:
- * str - string to hash without regard to case
- * mod - number of hash table entries
- * output:
- * the slot number on which `str' belongs
- *
- * NOTE: For more optimal hashing of smaller hash tables (entries < HASH_LEVEL)
- * we L_ value constants. This gives us a faster hash fold. For larger
- * hash tables, this is not needed so H_ value constants are used.
- *
- * NOTE: mod should be a prime <= ~H_HASH_MASK
- */
- static int
- hash_stric(str, mod)
- register char *str; /* the string to hash disregarding case */
- int mod; /* prime modulus, size of hash table */
- {
- register unsigned long val; /* current hash value */
- register unsigned long excess; /* upper/excess bits in val */
- register unsigned long c; /* the current string character */
-
- /* firewall - bogus case, but be safe anyway */
- if (str == NULL) {
- return 0;
- }
-
- /*
- * hash each character in the string
- *
- * if our mod is small enough, then use L_ value constants so that
- * strings can fold into themselves faster.
- */
- if (mod < HASH_LEVEL) {
- /* hash each character using the L_ values */
- for (val = 0; c=(unsigned long)lowercase(*str); ++str) {
- val = (val << HASH_UP_SHIFT) + c;
- val ^= ((excess = (val&L_HASH_MASK)) >> L_DOWN_SHIFT);
- val ^= excess;
- }
- } else {
- /* hash each character using the H_ values */
- for (val = 0; c=(unsigned long)lowercase(*str); ++str) {
- val = (val << HASH_UP_SHIFT) + c;
- val ^= ((excess = (val&H_HASH_MASK)) >> H_DOWN_SHIFT);
- val ^= excess;
- }
- }
-
- return (int)(val%mod); /* our hash value, mod the hash size */
- }
-
-
- #ifdef ETHEREAL_HASHDATA
- /*
- * walk_hash - walk thru a hash table
- *
- * returns NULL if there is no next element in the hash table
- *
- * input:
- * cur - our current location, or NULL to start at the beginning
- * table - the table to be walked
- * output:
- * a pointer to the next element, or NULL if no next element
- *
- * WARNING: results will be unpredictable or fatal if `cur' != NULL, and
- * `cur' != `the previous walk location', and `cur' is an element
- * that has been either deleted or replaced by another element.
- * It should be noted that this `cur' will never be the `previous
- * walk location' if our previous call ran off the end of the table.
- */
- struct hash *
- walk_hash(cur, table)
- struct hash *cur; /* where we are now */
- struct hash_table *table; /* hash table being walked */
- {
- register struct hash *prev; /* our previous walk location */
- register int indx; /* our previous hash slot */
- register int len; /* the table length */
-
- /*
- * firewall
- */
- if (table == NULL) {
- panic(EX_SOFTWARE, "walk_hash: table is NULL");
- }
- /* fetch these values for faster use */
- prev = table->prev;
- indx = table->indx;
- len = table->len;
-
- /*
- * find the first hash slot if cur is NULL (due to a restart request)
- */
- if (cur == NULL) {
- /* note that we really don't have a location yet */
- prev = NULL;
-
- /* find the first slot and return it */
- for (indx=0; indx < len; ++indx) {
- if (full_slot(table->slot[indx])) {
- /* we found the first entry of the first non-empty chain */
- prev = table->slot[indx];
- break;
- }
- }
-
- /*
- * walk from our current location to the next location
- */
- } else {
-
- /*
- * if `cur' is not the previous `cur', then find `cur' and
- * note where our hash table index now resides
- */
- if (cur != prev) {
- /* find the hash table index */
- indx = hash_string(cur->keystr, len, table->flags&HASH_CASEFOLD);
- /* if `cur' is an empty slot, panic */
- if (empty_slot(table->slot[indx])) {
- panic(EX_SOFTWARE, "walk_hash: <%s> hash slot is empty",
- cur->keystr);
- }
-
- /* walk down the hash table chain looking for our entry */
- for (prev = table->slot[indx];
- cur != prev && prev != NULL;
- prev = next_hash(prev,table)) {
- }
- }
- /* if `cur' is not in the hash table, panic */
- if (prev == NULL) {
- panic(EX_SOFTWARE, "walk_hash: <%s> is not in table", cur->keystr);
- }
-
- /*
- * if we were at the end of a chain, then our successor will
- * be the start of the next non-empty chain
- */
- if ((prev = next_hash(prev,table)) == NULL) {
- /* find the next non-empty chain */
- for (++indx; indx < len; ++indx) {
- if (full_slot(table->slot[indx])) {
- /* return first element of this chain */
- prev = table->slot[indx];
- break;
- }
- }
- }
- }
-
- /*
- * return the pointer the next element or NULL
- */
- /* remember our location for next time */
- table->prev = hash_addr(prev, table);
- table->indx = indx;
- return table->prev;
- }
- #endif /* ETHEREAL_HASHDATA */
-
-
- /*
- * new_hash_element - creat a new hash element
- *
- * return a malloced new hash element with the lengths correctly filled out
- *
- * inputs:
- * keystr - the key of this data element
- * data - the data to accompany `keystr', or NULL if no data
- * datalen - the length of `data' in bytes, or 0 is no data
- * table - the hash table which will get this element
- */
- static struct hash *
- new_hash_element(keystr, data, datalen, table)
- char *keystr; /* the keystring to be added */
- char *data; /* the associated data if any */
- int datalen; /* length of data, 0 ==> no data */
- struct hash_table *table; /* hash table being added to */
- {
- struct hash *new; /* the new slot chain location */
- int keylen; /* the length of the string, padded */
- unsigned long lk; /* temp var for key length */
-
- /*
- * firewall - check for bad pointers and values
- */
- if (keystr == NULL || table == NULL) {
- panic(EX_SOFTWARE, "new_hash_element: NULL keystring or table");
- }
- lk = keystr_len(keystr); /* compute padded key length */
- if (lk >= (unsigned long)(1L<<BITS_PER_SHORT)) {
- panic(EX_SOFTWARE, "new_hash_element: key too long");
- }
- keylen = (int)lk; /* now we know it will fit in an int */
- /* firewall - check against bad data being passed to us */
- if (datalen < 0 || (datalen > 0 && data == NULL) ||
- (unsigned long)datalen >= (unsigned long)(1L<<BITS_PER_SHORT)) {
- panic(EX_SOFTWARE,
- "new_hash_element: bad data passed with: <%s> datalen: <%d>",
- keystr, datalen);
- }
-
- /*
- * malloc the storage
- */
- new = (struct hash *)bmalloc( hashslot_size(keylen,datalen), table->life );
- /* firewall */
- if (is_odd(new)) {
- panic(EX_SOFTWARE,
- "new_hash_element: malloc returned odd address: %lx",
- (long)new);
- }
-
- /*
- * return the prebuild element
- */
- new->succ = NULL;
- new->keylen = keylen;
- strcpy(new->keystr, keystr);
- new->datalen = datalen;
- if (datalen > 0) {
- memcpy(hash_data(new), data, datalen);
- }
- return new;
- }
-
- #ifdef ETHEREAL_HASHDATA
- /*
- * free_hash_element - free an old hash element
- *
- * Frees a hash table element according to the life of the hash table.
- * Removes the hash table element if it in the hash table unless explicitly
- * told that the element is not in the table.
- *
- * inputs:
- * cur - the element which which we will free
- * search - non-zero means delete `cur' from table prior to the free
- * table - the table on which `cur' resides, or the table to which
- * `cur' would have been added
- *
- * WARNING: It is important that the `cur' element NOT be in a hash table
- * after the free. Unpredictable results will happen otherwise.
- * If `search' is non-zero, we will first attempt to delete `cur'
- * from `table'. It is a fatal error if `search' is non-zero and
- * `cur' is not in `table'.
- *
- * WARNING: It is important that `cur' was individually malloced (perhaps
- * by new_hash_element) so that the free of its address will
- * be valid.
- */
- void
- free_hash_element(cur, search, table)
- struct hash *cur; /* what we will delete */
- int search; /* non-zero => delete `cur' first */
- struct hash_table *table; /* table `cur' does/would_have belonged to */
- {
- /*
- * firewall - check for bad pointers and values
- */
- if (cur == NULL || table == NULL) {
- panic(EX_SOFTWARE, "free_hash_element: NULL cur or table");
- }
-
- /*
- * delete the element first if requested
- */
- if (search != 0 && delete_from_hash(cur->keystr, table) == NULL) {
- panic(EX_SOFTWARE,"free_hash_element: <%s> not in table",cur->keystr);
- }
-
- /*
- * free the storage
- */
- bfree(cur, table->life);
- return;
- }
- #endif /* ETHEREAL_HASHDATA */
-
- /*
- * new_hash_table - creat a new hash table
- *
- * return a malloced new hash table with correctly setup initial pointers
- *
- * input:
- * tablelen - number of slots in the hash table
- * life - the alloc block to which this is to be associated, or NULL
- * meaning the permanent block
- * output:
- * a pointer to a malloced empty hash table
- */
- struct hash_table *
- new_hash_table(tablelen, life, flags)
- int tablelen; /* number of slots in the hash table */
- struct block *life; /* is the hash table permanent or temporary */
- int flags; /* hash table flag as per struct hash_table */
- {
- register int i; /* index */
- struct hash_table *table; /* the malloced hash table */
-
- /*
- * firewalls
- */
- if (tablelen <= 0) {
- panic(EX_SOFTWARE, "new_hash_table: tablelen: %d", tablelen);
- }
- DEBUG3(DBG_HASH_LO,
- "new_hash_table: tablelen:%d life:%d flag:%d\n",tablelen,life,flags);
-
- /*
- * malloc the hash table
- */
- table = (struct hash_table *)bmalloc(table_size(tablelen), life);
- /* firewall */
- if (is_odd(table)) {
- panic(EX_SOFTWARE,
- "new_hash_table: malloc returned odd address: %d", (int)table);
- }
-
- /*
- * initialize the table
- */
- table->len = tablelen;
- table->flags = flags & HASH_FLAGMASK;
- table->life = life;
- table->prev = NULL; /* no current walk_hash() location */
- table->indx = 0; /* no current walk_hash() slot index */
- for (i=0; i < tablelen; i++) {
- table->slot[i] = NULL;
- }
- return table; /* return our new table */
- }
-
- #ifdef ETHEREAL_HASHDATA
- /*
- * free_hash_table - free a hash table and its associated data
- *
- * Free all storage associated with a hash table.
- *
- * NOTE: any malloced elements should be freed prior to calling this routine.
- *
- * input:
- * table - the hash table to free
- */
- void
- free_hash_table(table)
- struct hash_table *table; /* the hash table to free */
- {
- struct hash *cur; /* current element to delete */
-
- /*
- * firewalls
- */
- if (table == NULL ) {
- panic(EX_SOFTWARE, "free_hash_table: NULL table");
- }
- DEBUG(DBG_HASH_LO,"free_hash_table: start\n");
-
- /*
- * free the table slots
- */
- bfree(table, table->life);
- return;
- }
- #endif /* ETHEREAL_HASHDATA */
-
-
- /*
- * add_to_hash - add an element to the a hash table
- *
- * inputs:
- * keystr - the key of the data to add
- * data - the data to accompany `keystr', or NULL if no data
- * datalen - the length of `data' in bytes, or 0 if no data
- * table - a pointer to the hash table which is being modified
- * output:
- * returns ALREADY_HASHED if `keystr' is already in the `table', or
- * JUST_HASHED if we just added a no key. The `table' is not modified
- * if the key is already exists.
- */
- int
- add_to_hash(keystr, data, datalen, table)
- char *keystr; /* the keystring to be added */
- char *data; /* the associated data if any */
- int datalen; /* length of data, 0 ==> no data */
- struct hash_table *table; /* the hash table to add it to */
- {
- register struct hash *cur; /* the current slot chain location */
- register struct hash *prev; /* the previous slot chain location */
- register int cmp; /* -1, 0, or 1 for < = > compare */
- register int caseflag; /* 0 ==> use strcmp, 1 ==> strcmpic */
- int loc; /* the hash slot to add onto */
- struct hash *new; /* the new slot chain location */
-
- /*
- * firewall - watch for NULLs
- */
- if (keystr == NULL) {
- panic(EX_SOFTWARE, "add_to_hash: NULL keystr");
- }
- if (table == NULL) {
- panic(EX_SOFTWARE, "add_to_hash: NULL table");
- }
-
- /*
- * determine the slot on which this entry is to be added
- */
- caseflag = table->flags & HASH_CASEFOLD;
- loc = hash_string(keystr, table->len, caseflag);
- DEBUG2(DBG_HASH_LO, "add_to_hash: keystr: <%s> slot: %d\n", keystr, loc);
-
- /*
- * search the slot chain for our entry
- */
- /* special case for empty slot chains */
- if (empty_slot(table->slot[loc])) {
- DEBUG(DBG_HASH_MID, "add_to_hash: insert in NULL slot\n");
- new = new_hash_element(keystr, data, datalen, table);
- insert_hash(&table->slot[loc], new);
- return JUST_HASHED;
- }
-
- /*
- * search the chain
- */
- DEBUG2(DBG_HASH_VHI, "add_to_hash: slot:0x%lx cur:0x%lx\n",
- (long)table->slot[loc], (long)table->slot[loc]);
- for (prev=NULL, cur=table->slot[loc];
- cur != NULL; prev=cur, cur=next_hash(cur,table)) {
- /*
- * if we found the entry, stop
- */
- DEBUG2(DBG_HASH_VHI, "add_to_hash: comparing <%s> to <%s>",
- keystr, cur->keystr);
- if ((cmp = stringcmp(keystr, cur->keystr, caseflag)) == 0) {
- DEBUG(DBG_HASH_MID, "add_to_hash: already hashed\n");
- return ALREADY_HASHED;
-
- /*
- * we are past the insertion point, insert before here and stop
- * note if we are inserting at the beginning a a chain or in the middle
- */
- } else if (cmp < 0) {
- new = new_hash_element(keystr, data, datalen, table);
- if (prev == NULL) {
- DEBUG(DBG_HASH_MID, "add_to_hash: insert at front\n");
- insert_hash(&table->slot[loc], new); /* insert at beginning */
- } else {
- DEBUG(DBG_HASH_MID, "add_to_hash: insert in middle\n");
- insert_hash(prev, new); /* insert in middle */
- }
- return JUST_HASHED;
- }
- }
-
- /*
- * case: insertion at the end of the chain
- */
- DEBUG(DBG_HASH_MID, "add_to_hash: insert at END\n");
- new = new_hash_element(keystr, data, datalen, table);
- insert_hash(prev, new);
- return JUST_HASHED;
- }
-
- #ifdef ETHEREAL_HASHDATA
- /*
- * replace_in_hash - replace an existing element in a hash table
- *
- * inputs:
- * keystr - the key of the data to replace
- * data - the data to accompany `keystr', or NULL if no data
- * datalen - the length of `data' in bytes, or 0 if no data
- * table - a pointer to the hash table which is being modified
- * output:
- * returns a pointer to the element that was replaced, or NULL
- * if no element was replaced due to `keystr' not in `table'
- */
- struct hash *
- replace_in_hash(keystr, data, datalen, table)
- char *keystr; /* the keystring to replace */
- char *data; /* the associated data if any */
- int datalen; /* length of data, 0 ==> no data */
- struct hash_table *table; /* the hash table to add it to */
- {
- register struct hash *cur; /* the current slot chain location */
- register struct hash *prev; /* the previous slot chain location */
- register int cmp; /* -1, 0, or 1 for < = > compare */
- register int caseflag; /* 0 ==> use strcmp, 1 ==> strcmpic */
- int loc; /* the hash slot to add onto */
- struct hash *new; /* the new slot chain location */
-
- /*
- * firewall - watch for NULLs
- */
- if (keystr == NULL) {
- panic(EX_SOFTWARE, "replace_in_hash: NULL keystr");
- }
- if (table == NULL) {
- panic(EX_SOFTWARE, "replace_in_hash: NULL table");
- }
-
- /*
- * determine the slot on which this entry is to be added
- */
- caseflag = table->flags & HASH_CASEFOLD;
- loc = hash_string(keystr, table->len, caseflag);
- DEBUG2(DBG_HASH_LO, "replace_in_hash: keystr: <%s> slot: %d\n",keystr,loc);
-
- /*
- * search the slot chain for our entry
- */
- /* special case for empty slow chains */
- if (empty_slot(table->slot[loc])) {
- DEBUG(DBG_HASH_MID, "replace_in_hash: slot NULL\n");
- return NULL; /* no entry to replace */
- }
-
- /*
- * search the chain
- */
- for (prev=NULL, cur=table->slot[loc]; cur != NULL;
- prev=cur, cur=next_hash(cur, table)) {
- /*
- * if we found the entry, stop
- */
- if ((cmp = stringcmp(keystr, cur->keystr, caseflag)) == 0) {
- new = new_hash_element(keystr, data, datalen, table);
- if (prev == NULL) {
- DEBUG(DBG_HASH_MID, "replace_in_hash: replaced at front\n");
- /* insert at beginning */
- replace_hash(table->slot[loc], cur, new);
- } else {
- DEBUG(DBG_HASH_MID, "replace_in_hash: replaced in middle\n");
- replace_hash(prev->succ, cur, new); /* insert in middle */
- }
- return cur;
- /* if we have gone past our entry, stop searching */
- } else if (cmp < 0) {
- break;
- }
- }
-
- /*
- * entry not found, nothing to replace
- */
- DEBUG(DBG_HASH_MID, "replace_in_hash: not found\n");
- return NULL;
- }
-
- /*
- * store_in_hash - store an existing element in a hash table
- *
- * inputs:
- * keystr - the key of the data to store
- * data - the data to accompany `keystr', or NULL if no data
- * datalen - the length of `data' in bytes, or 0 if no data
- * table - a pointer to the hash table which is being modified
- * output:
- * returns a pointer to the element that was replaced, or NULL
- * if no element was replaced. In any case the element is added.
- */
- struct hash *
- store_in_hash(keystr, data, datalen, table)
- char *keystr; /* the keystring to replace */
- char *data; /* the associated data if any */
- int datalen; /* length of data, 0 ==> no data */
- struct hash_table *table; /* the hash table to add it to */
- {
- register struct hash *cur; /* the current slot chain location */
- register struct hash *prev; /* the previous slot chain location */
- register int cmp; /* -1, 0, or 1 for < = > compare */
- register int caseflag; /* 0 ==> use strcmp, 1 ==> strcmpic */
- int loc; /* the hash slot to add onto */
- struct hash *new; /* the new slot chain location */
-
- /*
- * firewall - watch for NULLs
- */
- if (keystr == NULL) {
- panic(EX_SOFTWARE, "store_in_hash: NULL keystr");
- }
- if (table == NULL) {
- panic(EX_SOFTWARE, "store_in_hash: NULL table");
- }
-
- /*
- * determine the slot on which this entry is to be added
- */
- caseflag = table->flags & HASH_CASEFOLD;
- loc = hash_string(keystr, table->len, caseflag);
- DEBUG2(DBG_HASH_LO, "store_in_hash: keystr: <%s> loc: %d\n", keystr, loc);
-
- /*
- * search the slot chain for our entry
- */
- /* special case for empty slow chains */
- if (empty_slot(table->slot[loc])) {
- DEBUG(DBG_HASH_MID, "store_in_hash: insert on NULL slot\n");
- new = new_hash_element(keystr, data, datalen, table);
- insert_hash(&table->slot[loc], new);
- return NULL;
- }
-
- /*
- * search the chain
- */
- for (prev=NULL, cur=table->slot[loc]; cur != NULL;
- prev=cur, cur=next_hash(cur, table)) {
- /*
- * if we found the entry, stop
- */
- if ((cmp = stringcmp(keystr, cur->keystr, caseflag)) == 0) {
- new = new_hash_element(keystr, data, datalen, table);
- if (prev == NULL) {
- DEBUG(DBG_HASH_MID, "store_in_hash: replaced at front\n");
- /* insert at beginning */
- replace_hash(table->slot[loc], cur, new);
- } else {
- DEBUG(DBG_HASH_MID, "store_in_hash: replaced in middle\n");
- replace_hash(prev->succ, cur, new); /* insert in middle */
- }
- return cur;
-
- /*
- * we are past the insertion point, insert before here and stop
- * note if we are inserting at the beginning a a chain or in the middle
- */
- } else if (cmp < 0) {
- new = new_hash_element(keystr, data, datalen, table);
- if (prev == NULL) {
- DEBUG(DBG_HASH_MID, "store_in_hash: insert at front\n");
- insert_hash(&table->slot[loc], new); /* insert at beginning */
- } else {
- DEBUG(DBG_HASH_MID, "store_in_hash: insert in middle\n");
- insert_hash(&prev->succ, new); /* insert in middle */
- }
- return NULL;
- }
- }
-
- /*
- * case: insertion at the end of the chain
- */
- DEBUG(DBG_HASH_MID, "store_in_hash: insert at END\n");
- new = new_hash_element(keystr, data, datalen, table);
- insert_hash(&prev->succ, new);
- return NULL;
- }
- #endif /* ETHEREAL_HASHDATA */
-
- /*
- * lookup_in_hash - lookup an element in a hash table and return
- * the associated data
- *
- * inputs:
- * keystr - the key of the data to add
- * data - pointer to a pointer, or 0 if no data is wanted
- * datalen - pointer to the data length, or NULL if no length is wanted
- * table - a pointer to the hash table which is being modified
- * output:
- * returns ALREADY_HASHED if `keystr' is already in the `table', or
- * NOT_HASHED the key was not found
- * data - if `data' was non-NULL points at the key's data or NULL
- * no data
- * datalen - if `datalen' was non-NULL, points at the length of the
- * key's data
- */
- int
- lookup_in_hash(keystr, data, datalen, table)
- char *keystr; /* the key to lookup */
- char **data; /* where to point at data, or NULL */
- int *datalen; /* where to place data len or NULL */
- struct hash_table *table; /* the hash table to add it to */
- {
- register struct hash *cur; /* the slot chain location */
- int loc; /* the hash slot to add onto */
- int caseflag; /* 0 ==> use strcmp, 1 ==> strcmpic */
- int cmp; /* compare function result */
-
- /*
- * firewall - watch for NULLs
- */
- if (keystr == NULL) {
- panic(EX_SOFTWARE, "lookup_in_hash: NULL keystr");
- }
- if (table == NULL) {
- panic(EX_SOFTWARE, "lookup_in_hash: table is NULL");
- }
-
- /*
- * determine the hash slot to search on
- */
- caseflag = table->flags & HASH_CASEFOLD;
- loc = hash_string(keystr, table->len, caseflag);
- DEBUG2(DBG_HASH_LO, "lookup_in_hash: keystr: <%s> slot: %d\n",keystr,loc);
- /* watch out for empty chains, there is nothing on them */
- if (empty_slot(table->slot[loc])) {
- DEBUG(DBG_HASH_MID, "lookup_in_hash: found at slot END\n");
- return NOT_HASHED;
- }
-
- /*
- * search the chain
- */
- for (cur=table->slot[loc]; cur != NULL; cur=next_hash(cur, table)) {
- /*
- * if we found the entry, stop
- */
- if ((cmp = stringcmp(keystr, cur->keystr, caseflag)) == 0) {
- DEBUG(DBG_HASH_MID, "lookup_in_hash: found\n");
- /*
- * fill in the requested args
- */
- if (data != NULL) {
- *data = hash_data(cur);
- }
- if (datalen != NULL) {
- *datalen = cur->datalen;
- }
- return ALREADY_HASHED;
- /* if we have gone past our entry, stop searching */
- } else if (cmp < 0) {
- break;
- }
- }
-
- /* found nothing */
- DEBUG(DBG_HASH_MID, "lookup_in_hash: not found\n");
- return NOT_HASHED;
- }
-
- #ifdef ETHEREAL_HASHDATA
- /*
- * delete_from_hash - delete an element in the hash table
- *
- * inputs:
- * keystr - the key of the data to add
- * table - a pointer to the hash table which is being modified
- * output:
- * returns a pointer to the element that was deleted, or NULL
- * if no element was deleted due to `keystr' not in `table'
- */
- struct hash *
- delete_from_hash(keystr, table)
- char *keystr; /* the key to lookup */
- struct hash_table *table; /* the hash table to add it to */
- {
- register struct hash *cur; /* the slot chain location */
- register struct hash *prev; /* previous element */
- int loc; /* the hash slot to add onto */
- int caseflag; /* 0 ==> use strcmp, 1 ==> strcmpic */
- int cmp; /* compare function result */
-
- /*
- * firewall - watch for NULLs
- */
- if (keystr == NULL) {
- panic(EX_SOFTWARE, "delete_from_hash: keystr is NULL");
- }
- if (table == NULL) {
- panic(EX_SOFTWARE, "delete_from_hash: table is NULL");
- }
-
- /*
- * determine the hash slot to search on
- */
- caseflag = table->flags & HASH_CASEFOLD;
- loc = hash_string(keystr, table->len, caseflag);
- DEBUG2(DBG_HASH_LO, "delete_from_hash: keystr: <%s> loc: %d\n",keystr,loc);
- /* watch out for empty chains, there is nothing on them */
- if (empty_slot(table->slot[loc])) {
- DEBUG(DBG_HASH_MID, "delete_from_hash: EMPTY slot\n");
- return NULL; /* key is not in the table */
- }
-
- /*
- * search the chain for the element to delete
- */
- for (prev=NULL, cur=table->slot[loc]; cur != NULL;
- prev=cur, cur=next_hash(cur, table)) {
- /*
- * if we found the entry, stop
- */
- if ((cmp = stringcmp(keystr, cur->keystr, caseflag)) == 0) {
- if (prev == NULL) {
- DEBUG(DBG_HASH_MID, "delete_from_hash: delete at front\n");
- /* delete at the beginning */
- delete_hash(&table->slot[loc], cur);
- } else {
- DEBUG(DBG_HASH_MID, "delete_from_hash: delete in middle\n");
- delete_hash(&prev->succ, cur); /* delete in middle */
- }
- return cur;
- /*
- * if we have gone past the entry, stop looking
- */
- } else if (cmp < 0) {
- DEBUG(DBG_HASH_MID, "delete_from_hash: past spot\n");
- break;
- }
- }
-
- /* found nothing */
- DEBUG(DBG_HASH_MID, "delete_from_hash: not found\n");
- return NULL; /* nothing was deleted */
- }
- #endif /* ETHEREAL_HASHDATA */
-
-
- /*
- * write_hash_table - write a hash table to a stream
- *
- * input:
- * tab - the hash table to write
- * stream - the stream on which to write
- */
- void
- write_hash_table(table, stream)
- struct hash_table *table; /* the table to write */
- FILE *stream; /* the stream on which to write table */
- {
- register int i; /* index */
- long tab_loc; /* file location of hash_table start */
- long loc; /* current location */
- int size; /* size of the current element */
- long offset; /* current offset within the file */
- long disc_succ; /* cur->succ as written to disc */
- struct hash *cur; /* the current hash element */
- struct hash_table *tab; /* disk copy of table */
-
- /*
- * firewalls
- */
- if (table == NULL || stream == NULL) {
- panic(EX_SOFTWARE, "write_hash_table: NULL arguments");
- }
- if (table->len <= 0) {
- panic(EX_SOFTWARE, "write_hash_table: table length: %d", table->len);
- }
- DEBUG2(DBG_HASH_LO, "write_hash_table: size: %d life: %d\n",
- table->len, table->life);
-
- /*
- * allocate a temporary disk copy of the hash table
- */
- tab = (struct hash_table *)xmalloc(table_size(table->len));
- tab->life = table->life; /* preserve life type */
- tab->len = table->len; /* preserve table length */
- tab->flags = table->flags; /* preserve flags - XXX all bits ??? */
- tab->prev = NULL; /* clear walking location */
- tab->indx = 0; /* clear walking slot index */
-
- /*
- * skip the hash table block
- */
- tab_loc = ftell(stream);
- if (fseek(stream, (long)table_size(table->len), 1) < 0) {
- panic(EX_IOERR, "write_hash_table: bad skip of %d over table",
- table_size(table->len));
- }
-
- /*
- * write out each hash table chain
- */
- for (i=0; i < table->len; i++) {
-
- /* don't write out chains that don't exist */
- if (empty_slot(table->slot[i])) {
- /* slot is empty, deal with it quickly */
- set_ptr(tab->slot[i], (int)NULL);
- continue;
- }
-
- /* note starting offset of hash chain */
- offset = ftell(stream) - tab_loc;
- if (is_odd(offset)) {
- panic(EX_IOERR, "write_hash_table: slot %d offset is odd:%ld",
- i, offset);
- }
- set_ptr(tab->slot[i], to_odd(offset));
-
- /* write up to the last chain element */
- for (cur=table->slot[i]; cur != NULL; cur=next_hash(cur, table)) {
- /* compute the current element length */
- size = hash_len(cur);
- if (is_odd(size)) {
- panic(EX_IOERR,
- "write_hash_table: size is odd:%ld for <%s>",
- offset, cur->keystr);
- }
- offset += size; /* note file movement */
- /* write the hash element disk successor element */
- disc_succ = (cur->succ == NULL) ? 0 : to_odd(offset);
- if (fwrite((char *)&disc_succ, sizeof(disc_succ), 1, stream) != 1) {
- panic(EX_IOERR,
- "write_hash_table: bad succ write <%s> on slot %d",
- cur->keystr, i);
- }
- /* write the rest of the hash element data */
- if (fwrite((char *)cur+sizeof(cur->succ), size-sizeof(cur->succ),
- 1, stream) != 1) {
- panic(EX_IOERR,
- "write_hash_table: bad write <%s> on slot %d",
- cur->keystr, i);
- }
- }
- }
-
- /*
- * write the hash table back in its place and return to our
- * current position
- */
- loc = ftell(stream); /* remember our current location */
- if (fseek(stream, (long)tab_loc, 0) < 0) {
- panic(EX_IOERR, "write_hash_table: bad skip back to table at %d",
- tab_loc);
- }
- if (fwrite((char *)tab, table_size(tab->len), 1, stream) != 1) {
- panic(EX_IOERR, "write_hash_table: bad table write");
- }
- if (fseek(stream, (long)loc, 0) < 0) {
- panic(EX_IOERR, "write_hash_table: bad end seek to %d", loc);
- }
-
- /*
- * free the temporary disk copy of the hash table
- */
- xfree((char *)tab);
- return;
- }
-
-
- #ifdef STANDALONE
-
- #include "varargs.h"
- #include <sys/types.h>
- #include <sys/stat.h>
-
- #define TABLE_LEN 3 /* use only a few slots to stress the chain code */
- #define INPUT_SIZE (70*1024) /* max input line */
-
- char *tempname = "/tmp/hashtestXXXXXX"; /* tempory hash file name */
-
- void
- main(argc, argv)
- int argc; /* arg count */
- char *argv[]; /* args */
- {
- int i; /* index */
- char buf[INPUT_SIZE+1]; /* the input buffer for stdin args */
- struct hash *cur; /* pointer to walk the table */
- struct hash_table *table; /* our allocated table */
- struct hash_table *tableic; /* allocated table without regard to case */
- void test(); /* test an with an element */
- void dump_hash_table(); /* dump the contents of the hash table */
- void hash_file_test(); /* perform hash file testing */
-
- /*
- * establish debug level
- */
- DEBUG(DBG_HASH_LO, "main: start\n");
- if (argc > 1 && strncmp(argv[1], "-d", 2) == 0) {
- /* we have a debug level, use it */
- if (argv[1][2] != '\0') {
- debug = atoi(&argv[1][2]);
- --argc;
- ++argv;
- } else if (argc > 2) {
- debug = atoi(argv[2]);
- argc -= 2;
- argv += 2;
- }
- }
- DEBUG1(DBG_HASH_LO, "main: debug level: %d\n", debug);
-
- /*
- * setup a hash table
- */
- table = new_hash_table(TABLE_LEN, (struct block *)NULL, HASH_CASEFOLD);
- tableic = new_hash_table(TABLE_LEN, (struct block *)NULL, 0);
-
- /*
- * special case: no args means read one arg per line
- */
- if (argc == 1) {
- while(fgets(buf, INPUT_SIZE, stdin) != NULL) {
- i = strlen(buf);
- buf[i-1] = '\0';
- DEBUG1(DBG_HASH_LO,
- "main: testing <%s> -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-\n", buf);
- test(buf, table);
- if ( debug >= DBG_HASH_HI ) {
- dump_hash_table(table);
- }
- DEBUG1(DBG_HASH_LO,
- "main: testing ignore case <%s> -*-*-*-*-*-*-*-*-*-\n", buf);
- test(buf, tableic);
- if ( debug >= DBG_HASH_HI ) {
- dump_hash_table(tableic);
- }
- }
-
- /*
- * hash each argument
- */
- } else {
- for (i=1; i < argc; ++i) {
- DEBUG1(DBG_HASH_LO,
- "main: testing <%s> -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-\n", buf);
- test(argv[i], table);
- if ( debug >= DBG_HASH_HI ) {
- dump_hash_table(table);
- }
- DEBUG1(DBG_HASH_LO,
- "main: testing ignore case <%s> -*-*-*-*-*-*-*-*-*-\n", buf);
- test(argv[i], tableic);
- if ( debug >= DBG_HASH_HI ) {
- dump_hash_table(tableic);
- }
- }
- }
-
- /*
- * test the file operations
- */
- DEBUG(DBG_HASH_LO, "main: hash_file_test\n");
- hash_file_test(table);
- DEBUG(DBG_HASH_LO, "main: hash_file_test ignore case\n");
- hash_file_test(tableic);
-
- /*
- * final cleanup
- */
- DEBUG(DBG_HASH_LO, "main: free memory\n");
- /* free the hash table elements */
- for (cur=walk_hash((struct hash *)NULL,table);
- cur != NULL;
- cur=walk_hash(cur,table))
- {
- free_hash_element(cur, 0, table); /* free without deletion */
- }
- free_hash_table(table); /* free up the memory */
- DEBUG(DBG_HASH_LO, "main: free memory ignore case\n");
- for (cur=walk_hash((struct hash *)NULL,tableic);
- cur != NULL;
- cur=walk_hash(cur,tableic))
- {
- free_hash_element(cur, 0, tableic); /* free without deletion */
- }
- free_hash_table(tableic); /* free up the memory */
- DEBUG(DBG_HASH_LO, "main: end\n");
- exit(EX_OK);
- }
-
- /*
- * perform various tests on a string
- */
- void
- test(buf, table)
- char *buf; /* the key to add */
- struct hash_table *table; /* our allocated table */
- {
- register struct hash *cur; /* the current hash entry */
- char *data; /* the data we stored */
- int datalen; /* length of data */
- int buflen; /* length of the buffer string + NULL */
- int i; /* index */
- register int caseflag; /* 0 ==> use strcmp, 1 ==> strcmpic */
-
- /* test adding */
- buflen = strlen(buf)+1;
- i = add_to_hash(buf, buf, buflen, table);
- DEBUG2(DBG_HASH_LO, "test: <%s> add: %d\n", buf, i);
-
- /* test the lookup function */
- DEBUG1(DBG_HASH_MID, "test: <%s> lookup\n", buf);
- caseflag = table->flags & HASH_CASEFOLD;
- if (lookup_in_hash(buf, &data, &datalen, table) != ALREADY_HASHED) {
- panic(EX_SOFTWARE,
- "test: add_to_hash lost <%s>", buf);
- } else if (stringcmp(buf, data, caseflag) != 0 || buflen != datalen) {
- panic(EX_SOFTWARE,
- "test: add_to_hash lookup <%s> != <%s>", buf, data);
- }
- i = add_to_hash(buf, buf, buflen, table);
- if (i != ALREADY_HASHED) {
- panic(EX_SOFTWARE, "test: add_to_hash returned: %d for #2\n", i);
- }
-
- /* test the delete function */
- DEBUG1(DBG_HASH_MID, "test: <%s> delete\n", buf);
- cur = delete_from_hash(buf, table); /* delete something that exists */
- if (cur == NULL) {
- panic(EX_SOFTWARE,
- "test: delete_from_hash unable to delete <%s>", buf);
- } else if (stringcmp(buf, hash_data(cur), caseflag) != 0 ||
- buflen != cur->datalen) {
- panic(EX_SOFTWARE,
- "test: delete_from_hash mis delete of <%s>", buf);
- } else {
- free_hash_element(cur, 0, table); /* free up memory */
- }
- DEBUG1(DBG_HASH_MID, "test: <%s> empty delete\n", buf);
- cur = delete_from_hash(buf, table); /* delete a nothing */
- if (cur != NULL) {
- panic(EX_SOFTWARE,
- "test: delete_from_hash ghost delete #2 of <%s>", buf);
- }
-
- /* test the store function */
- DEBUG1(DBG_HASH_MID, "test: <%s> store\n", buf);
- cur = store_in_hash(buf, buf, buflen, table);
- if (cur != NULL) {
- panic(EX_SOFTWARE,
- "test: store_in_hash ghost store of <%s>", buf);
- }
- DEBUG1(DBG_HASH_MID, "test: <%s> store #2\n", buf);
- cur = store_in_hash(buf, buf, buflen, table);
- if (cur == NULL) {
- panic(EX_SOFTWARE,
- "test: store_in_hash lost store #2 of <%s>", buf);
- } else if (stringcmp(buf, hash_data(cur), caseflag) != 0 ||
- buflen != cur->datalen) {
- panic(EX_SOFTWARE,
- "test: store_in_hash mis store #2 of <%s>", buf);
- } else {
- free_hash_element(cur, 0, table); /* free up memory */
- }
-
- /* test the replace function */
- DEBUG1(DBG_HASH_MID, "test: <%s> replace_in_hash\n", buf);
- cur = replace_in_hash(buf, buf, buflen, table);
- if (cur == NULL) {
- panic(EX_SOFTWARE,
- "test: replace_in_hash lost <%s>", buf);
- } else if (stringcmp(buf, hash_data(cur), caseflag) != 0 ||
- buflen != cur->datalen) {
- panic(EX_SOFTWARE,
- "test: replace_in_hash mis replace of <%s>", buf);
- } else {
- free_hash_element(cur, 0, table); /* free up memory */
- }
- DEBUG1(DBG_HASH_MID, "test: <%s> replace_in_hash empty\n", buf);
- cur = delete_from_hash(buf, table); /* delete something that exists */
- if (cur == NULL) {
- panic(EX_SOFTWARE,
- "test: delete_from_hash unable to delete #3 <%s>", buf);
- } else {
- free_hash_element(cur, 0, table); /* free up memory */
- }
- cur = replace_in_hash(buf, buf, buflen, table);
- if (cur != NULL) {
- panic(EX_SOFTWARE,
- "test: replace_in_hash ghost replace of <%s>", buf);
- } else {
- /* put it back for keeps */
- cur = store_in_hash(buf, buf, buflen, table);
- if (cur != NULL) {
- panic(EX_SOFTWARE,
- "test: store_in_hash store #3 lost <%s>", buf);
- }
- }
- }
-
- /*
- * dump_hash_table - dump the hash table
- */
- void
- dump_hash_table(table)
- struct hash_table *table; /* our allocated table */
- {
- int i; /* index */
- struct hash *cur; /* the current hash entry */
-
- /*
- * check the hash chains
- */
- for (i=0; i < TABLE_LEN; ++i) {
- DEBUG1(DBG_HASH_HI, "dump: slot[%d]:", i);
- /* check for empty slots */
- if (empty_slot(table->slot[i])) {
- DEBUG(DBG_HASH_HI, "Empty\n");
- continue;
- }
- for (cur=table->slot[i]; cur != NULL; cur=next_hash(cur, table)) {
- if (cur->keylen == 0) {
- DEBUG2(DBG_HASH_HI, " 0x%lx: <%s> :EOC ",
- (long)cur, cur->keystr);
- } else {
- DEBUG3(DBG_HASH_HI, " 0x%lx: <%s> :0x%lx ",
- (long)cur, cur->keystr, (long)cur->succ);
- }
- }
- DEBUG(DBG_HASH_HI, "\n");
- }
- }
-
- /*
- * hash_file_test - test the hash table file operations
- */
- void
- hash_file_test(table)
- struct hash_table *table; /* the hash table to operate on */
- {
- char *filename; /* the file to operate on */
- struct stat buf; /* file status buffer */
- struct hash *cur; /* current location in hash table */
- struct hash *cur2; /* current location in hash table2 */
- struct hash_table *table2; /* the hash table to operate on */
- int caseflag; /* 0 ==> use strcmp, 1 ==> strcmpic */
- char *template; /* template for mktemp */
- FILE *stream; /* the file stream */
- char *mktemp(); /* form a temp filename */
-
- /*
- * open up a file to perform hash table operations into
- */
- template = xmalloc(strlen(tempname)+1);
- strcpy(template, tempname);
- filename = mktemp(template);
- DEBUG1(DBG_HASH_LO, "hash_file_test: using <%s>\n", filename);
- stream = fopen(filename, "w");
- if (stream == NULL) {
- panic(EX_CANTCREAT, "hash_file_test: can not creat <%s>", filename);
- }
-
- /*
- * write out the hash table
- */
- write_hash_table(table, stream);
- if (fclose(stream) != 0) {
- panic(EX_IOERR, "hash_file_test: can not fclose\n");
- }
-
- /*
- * reread the hash table into memory
- */
- DEBUG1(DBG_HASH_MID, "hash_file_test: rereading <%s>\n", filename);
- stream = fopen(filename, "r");
- if (stream == NULL) {
- panic(EX_CANTCREAT, "hash_file_test: can not reopen <%s>", filename);
- }
- if (fstat(fileno(stream), &buf) < 0) {
- panic(EX_IOERR, "hash_file_test: can not stat <%s>", filename);
- }
- table2 = (struct hash_table *)xmalloc(buf.st_size * sizeof(char));
- if (fread((char *)table2,sizeof(char),buf.st_size,stream) != buf.st_size) {
- panic(EX_IOERR, "hash_file_test: can not reread <%s>", filename);
- }
-
- /*
- * walk thru each hash table and verify string/data
- */
- caseflag = table->flags & HASH_CASEFOLD;
- DEBUG(DBG_HASH_MID, "hash_file_test: verify\n");
- for (cur=walk_hash((struct hash *)NULL,table),
- cur2=walk_hash((struct hash *)NULL,table2);
- cur != NULL || cur2 != NULL;
- cur=walk_hash(cur,table), cur2=walk_hash(cur2,table2))
- {
- /* compare cur and cur2 */
- if (stringcmp(cur->keystr,cur2->keystr,caseflag) != 0) {
- panic(EX_SOFTWARE,
- "hash_file_test: key mismatch: <%s> != <%s>\n",
- cur->keystr, cur2->keystr);
- }
- if (cur->datalen != cur2->datalen) {
- panic(EX_SOFTWARE,
- "hash_file_test: key mismatch: %d != %d\n",
- cur->datalen, cur2->datalen);
- }
- if (memcmp(hash_data(cur), hash_data(cur2), cur->datalen) != 0) {
- panic(EX_SOFTWARE,
- "hash_file_test: data mismatch between <%s> and <%s>\n",
- cur->keystr, cur2->keystr);
- }
- }
-
- /*
- * cleanup
- */
- DEBUG(DBG_HASH_MID, "hash_file_test: cleanup\n");
- xfree(table2);
- }
-
- /*
- * panic - fake the panic routine
- *
- * define panic rather than using the external routines.
- */
- /*VARARGS2*/
- void
- panic(exitcode, fmt, va_alist)
- int exitcode; /* call exit(exitcode) */
- char *fmt; /* printf(3) format */
- va_dcl /* arguments for printf */
- {
- va_list ap;
-
- va_start(ap);
- (void)fprintf(stderr, "PANIC(%s): ", exitcode);
- (void)vfprintf(stderr, fmt, ap);
- putc('\n', stderr); /* fatal messages not \n terminated */
- va_end(ap);
-
- return_to_sender = TRUE;
- exit(exitcode);
- }
-
- #endif /* STANDALONE */
-